maReg = function(select = "", print = TRUE){
  reg = lm(witPerAut ~ TextPerAut, 
           data = as.data.frame(autTrads[colors == select,]))
  if(print == TRUE){
    print(summary(reg))
    }
  abline(reg, col = select)
}

Troubadours’ texts and witnesses

Some distributions

  1. Distributions of the number of witnesses / texts / author, in relation.

  2. Chronological distributions ?

  3. Geographical distributions ?

Preprocessing

Texts and witnesses

# Remove vidas
BeDT = BeDT[grep(",", BeDT[,"repertorio_n"]), ]
# Les pièces non strictement lyriques: à garder ou pas ?
BeDT = BeDT[grep("[IVX]+", BeDT[,"repertorio_n"], invert = TRUE), , drop = TRUE]
# Add generation information

Witness per author

  • anonyms removed;
autText = matrix(ncol = 2, dimnames = list(NULL, c("aut", "text")), unlist(strsplit(as.character(BeDT[,"repertorio_n"]), ",")), byrow = TRUE)
# Remove anonyms
autText = autText[!autText[,1] == "BEdT 461", ]
#
witPerAut = table(autText[,1])
plot(table(witPerAut), type = "h", col = "red", lwd = 10, main = "Distr. of witnesses per author", xlab = "number of witnesses", ylab = "Freqs", sub = paste("N = ", nrow(autText)))

summary(as.vector(witPerAut))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    1.00    4.00   32.57   14.00  759.00
head(sort(witPerAut, decreasing = TRUE))
## 
## BEdT 242 BEdT 167 BEdT 070 BEdT 364 BEdT 010 BEdT 335 
##      759      547      515      500      470      402

Petit top:

  • Guiraut de Borneill: 759 témoins;
  • Gaucelm Faidit: 547 témoins;
  • Bernart de Ventadorn: 515 témoins;
  • Peire Vidal: 500 témoins;
  • Aimeric de Pegulhan: 471 témoins;
  • Peire Cardenal: 410 témoins.

Text per author

autTextUniques = unique(autText)
TextPerAut = table(autTextUniques[,1])
plot(table(TextPerAut), type = "h", col = "red", lwd = 10, main = "Distr. of texts per author", xlab = "number of texts", ylab = "Freqs", sub = paste("N = ", nrow(autTextUniques)))

summary(as.vector(TextPerAut))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   6.661   5.000  91.000
head(sort(TextPerAut, decreasing = TRUE))
## 
##  BEdT 082 BEdT 434a  BEdT 248  BEdT 242  BEdT 246  BEdT 335 
##        91        84        83        79        77        71
troubTextPerAut = TextPerAut
troubautTextUniques = autTextUniques
  • (248, Guiraut Riquier: 98 -> disparaît car nombreux textes non lyriques)
  • 082, Bertran Carbonel: 91

Relationship nr. of text / nr. of wits per author

autTrads = cbind(TextPerAut, witPerAut)
# set rownames
rownames(BeDT_auts) = BeDT_auts[, "rep_n_aut"]
# Get generation information
gens = BeDT_auts[rownames(autTrads),][, "gen"]
colors = rep("white", length(gens))
colors[grep("^1", gens)] = "blue"
colors[grep("^2", gens)] = "yellow"
colors[grep("^3", gens)] = "red"
colors[grep("^4", gens)] = "darkred"
colors[grep("^5", gens)] = "purple"
colors[grep("^6", gens)] = "black"

table(colors)
## colors
##   black    blue darkred  purple     red   white  yellow 
##      56       8     121      53      52      45      13
plot(table(colors))

Régression log(wits per auts) ~ log(TextPerAut)

plot(autTrads, log = "xy", col = colors, main ="Nombre de textes et de témoins par auteur", xlab = "N. textes", ylab = "N. témoins", sub = "plan log/log")
legend("topleft", legend = c("........-1150", "1150-1175", "1170-1210", "1190-1235","1230-1265", "1260-...."), fill = c("blue", "yellow", "red", "darkred", "purple", "black"), cex = 0.7)
reg = lm(log(witPerAut) ~ log(TextPerAut), data = as.data.frame(autTrads))
summary(reg)
## 
## Call:
## lm(formula = log(witPerAut) ~ log(TextPerAut), data = as.data.frame(autTrads))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8389 -0.5021 -0.1463  0.4765  2.1370 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.50208    0.04825   10.41   <2e-16 ***
## log(TextPerAut)  1.30438    0.03175   41.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7064 on 346 degrees of freedom
## Multiple R-squared:  0.8299, Adjusted R-squared:  0.8294 
## F-statistic:  1688 on 1 and 346 DF,  p-value: < 2.2e-16
abline(reg, col="red")

En différenciant par période

plot(autTrads, log = "xy", col = colors, main ="Nombre de textes et de témoins par auteur", xlab = "N. textes", ylab = "N. témoins", sub = "plan log/log")
legend("topleft", legend = c("........-1150", "1150-1175", "1170-1210", "1190-1235","1230-1265", "1260-...."), fill = c("blue", "yellow", "red", "darkred", "purple", "black"), cex = 0.7)
maReg(select = "blue")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.904  -5.177  -3.088   2.753  25.014 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.3714     5.6003  -0.423    0.687    
## TextPerAut    4.4796     0.3592  12.472 1.62e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.66 on 6 degrees of freedom
## Multiple R-squared:  0.9629, Adjusted R-squared:  0.9567 
## F-statistic: 155.6 on 1 and 6 DF,  p-value: 1.623e-05
maReg(select = "yellow")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -145.76  -33.08   18.49   23.49  141.72 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -26.0000    25.1432  -1.034    0.323    
## TextPerAut    9.5067     0.8816  10.784 3.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 72.06 on 11 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.9057 
## F-statistic: 116.3 on 1 and 11 DF,  p-value: 3.46e-07
maReg(select = "red")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -124.909  -12.997    1.089    7.156  186.283 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.8194     8.2103  -1.074    0.288    
## TextPerAut    8.7307     0.4459  19.581   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.25 on 50 degrees of freedom
## Multiple R-squared:  0.8846, Adjusted R-squared:  0.8823 
## F-statistic: 383.4 on 1 and 50 DF,  p-value: < 2.2e-16
maReg(select = "darkred")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.758  -1.983   1.695   2.695 143.478 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.3733     1.9958  -3.694 0.000334 ***
## TextPerAut    6.6779     0.1882  35.483  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.33 on 119 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.9129 
## F-statistic:  1259 on 1 and 119 DF,  p-value: < 2.2e-16
maReg(select = "purple")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.0837  -2.4037  -2.4037  -0.4037  25.3438 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.4295     1.2622   1.133    0.263    
## TextPerAut    1.9742     0.0826  23.901   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.34 on 51 degrees of freedom
## Multiple R-squared:  0.918,  Adjusted R-squared:  0.9164 
## F-statistic: 571.2 on 1 and 51 DF,  p-value: < 2.2e-16
maReg(select = "black")

## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.5243  -1.1876  -0.9030  -0.1106  28.7603 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.61844    1.06589    0.58    0.564    
## TextPerAut   1.28459    0.05374   23.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.333 on 54 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.912 
## F-statistic: 571.3 on 1 and 54 DF,  p-value: < 2.2e-16

Régression wits per auts ~ TextPerAut (moins bonne)

plot(autTrads, col = colors, main ="Nombre de textes et de témoins par auteur", xlab = "N. textes", ylab = "N. témoins")
legend("topleft", legend = c("........-1150", "1150-1175", "1170-1210", "1190-1235","1230-1265", "1260-...."), fill = c("blue", "yellow", "red", "darkred", "purple", "black"), cex = 0.7)
reg = lm(witPerAut ~ TextPerAut, data = as.data.frame(autTrads))
summary(reg)
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -337.43   -6.08   -3.03   -0.14  361.78 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.0111     3.2912  -0.307    0.759    
## TextPerAut    5.0409     0.2195  22.968   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 55.01 on 346 degrees of freedom
## Multiple R-squared:  0.6039, Adjusted R-squared:  0.6028 
## F-statistic: 527.5 on 1 and 346 DF,  p-value: < 2.2e-16
abline(reg, col="red")

Quels sont ces points aberrants ?

autTrads[
  autTrads[, "TextPerAut"] > 60 
  & autTrads[, "witPerAut"] < 300, 
]
##           TextPerAut witPerAut
## BEdT 082          91       160
## BEdT 246          77        81
## BEdT 248          83       136
## BEdT 434a         84        85
plot(autTrads, col = colors, main ="Nombre de textes et de témoins par auteur", xlab = "N. textes", ylab = "N. témoins")
legend("topleft", legend = c("........-1150", "1150-1175", "1170-1210", "1190-1235","1230-1265", "1260-...."), fill = c("blue", "yellow", "red", "darkred", "purple", "black"), cex = 0.7)
maReg(select = "blue")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.904  -5.177  -3.088   2.753  25.014 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.3714     5.6003  -0.423    0.687    
## TextPerAut    4.4796     0.3592  12.472 1.62e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.66 on 6 degrees of freedom
## Multiple R-squared:  0.9629, Adjusted R-squared:  0.9567 
## F-statistic: 155.6 on 1 and 6 DF,  p-value: 1.623e-05
maReg(select = "yellow")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -145.76  -33.08   18.49   23.49  141.72 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -26.0000    25.1432  -1.034    0.323    
## TextPerAut    9.5067     0.8816  10.784 3.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 72.06 on 11 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.9057 
## F-statistic: 116.3 on 1 and 11 DF,  p-value: 3.46e-07
maReg(select = "red")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -124.909  -12.997    1.089    7.156  186.283 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.8194     8.2103  -1.074    0.288    
## TextPerAut    8.7307     0.4459  19.581   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.25 on 50 degrees of freedom
## Multiple R-squared:  0.8846, Adjusted R-squared:  0.8823 
## F-statistic: 383.4 on 1 and 50 DF,  p-value: < 2.2e-16
maReg(select = "darkred")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.758  -1.983   1.695   2.695 143.478 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.3733     1.9958  -3.694 0.000334 ***
## TextPerAut    6.6779     0.1882  35.483  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.33 on 119 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.9129 
## F-statistic:  1259 on 1 and 119 DF,  p-value: < 2.2e-16
maReg(select = "purple")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.0837  -2.4037  -2.4037  -0.4037  25.3438 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.4295     1.2622   1.133    0.263    
## TextPerAut    1.9742     0.0826  23.901   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.34 on 51 degrees of freedom
## Multiple R-squared:  0.918,  Adjusted R-squared:  0.9164 
## F-statistic: 571.2 on 1 and 51 DF,  p-value: < 2.2e-16
maReg(select = "black")

## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.5243  -1.1876  -0.9030  -0.1106  28.7603 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.61844    1.06589    0.58    0.564    
## TextPerAut   1.28459    0.05374   23.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.333 on 54 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.912 
## F-statistic: 571.3 on 1 and 54 DF,  p-value: < 2.2e-16
  • 082: Bertran Carbonel
  • 246: Guillem de l’Olivier d’Arle
  • 248: Guiraut Riquier
  • 434a: Cerveri de Girona 2 (tous attestés dans R ou Sg ?)

-> question des attributions douteuses (sigle d’auteur à lettre) et question des textes non lyriques (sigles à lettre).

La rég. sans eux,

# plot(autTrads, col = colors)
# autTrads2 = autTrads[!rownames(autTrads) %in% c("BEdT 082", "BEdT 246", "BEdT 248", "BEdT 434a"), ]
# reg = lm(witPerAut ~ TextPerAut, data = as.data.frame(autTrads2))
# summary(reg)
# abline(reg, col="red")

-> assez logique que soient tardifs. Effet goulot d’étranglement fait que petit nombre de textes passent. Mais, quand ils ont survécu, ils peuvent être beaucoup copiés. Alors que, quand plus récent, on peut avoir plus de textes avec peu de copies (moins filtré).

Same regression, but merging before 1150 and 1150-1175 (too small)

colors[colors == "blue"] = "yellow"
plot(autTrads, col = colors, main ="D", xlab = "N. textes", ylab = "N. témoins")
legend("topleft", legend = c("........-1175", "1170-1210", "1190-1235","1230-1265", "1260-...."), fill = c("yellow", "red", "darkred", "purple", "black"), cex = 0.7)
maReg(select = c("yellow"))
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -154.56  -26.02   15.14   23.86  168.58 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -25.7231    18.3907  -1.399    0.178    
## TextPerAut    8.8606     0.7532  11.764 3.62e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 68.35 on 19 degrees of freedom
## Multiple R-squared:  0.8793, Adjusted R-squared:  0.8729 
## F-statistic: 138.4 on 1 and 19 DF,  p-value: 3.62e-10
maReg(select = "red")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -124.909  -12.997    1.089    7.156  186.283 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.8194     8.2103  -1.074    0.288    
## TextPerAut    8.7307     0.4459  19.581   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.25 on 50 degrees of freedom
## Multiple R-squared:  0.8846, Adjusted R-squared:  0.8823 
## F-statistic: 383.4 on 1 and 50 DF,  p-value: < 2.2e-16
maReg(select = "darkred")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.758  -1.983   1.695   2.695 143.478 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.3733     1.9958  -3.694 0.000334 ***
## TextPerAut    6.6779     0.1882  35.483  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.33 on 119 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.9129 
## F-statistic:  1259 on 1 and 119 DF,  p-value: < 2.2e-16
maReg(select = "purple")
## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.0837  -2.4037  -2.4037  -0.4037  25.3438 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.4295     1.2622   1.133    0.263    
## TextPerAut    1.9742     0.0826  23.901   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.34 on 51 degrees of freedom
## Multiple R-squared:  0.918,  Adjusted R-squared:  0.9164 
## F-statistic: 571.2 on 1 and 51 DF,  p-value: < 2.2e-16
maReg(select = "black")

## 
## Call:
## lm(formula = witPerAut ~ TextPerAut, data = as.data.frame(autTrads[colors == 
##     select, ]))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.5243  -1.1876  -0.9030  -0.1106  28.7603 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.61844    1.06589    0.58    0.564    
## TextPerAut   1.28459    0.05374   23.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.333 on 54 degrees of freedom
## Multiple R-squared:  0.9136, Adjusted R-squared:  0.912 
## F-statistic: 571.3 on 1 and 54 DF,  p-value: < 2.2e-16

Witness per text

# get the frequencies of the texts
textsFreqs = table(as.character(BeDT[,"repertorio_n"]))
#View(sort(textsFreqs, decreasing = TRUE))
textsFreqs = as.data.frame(textsFreqs)
# plot it
#plot(textsFreqs[,2])
#barplot(textsFreqs[,2])
# hist(textsFreqs[,2], breaks = seq(min(textsFreqs[,2])-0.5, max(textsFreqs[,2])+0.5, by=1), main = "Distribution of witnesses per text", xlab = "number of witnesses", include.lowest = TRUE)
# or, better
plot(table(textsFreqs[,2]), type = "h", col = "red", lwd = 10, main = "Distr. of witnesses per troubadour text", xlab = "number of witnesses", ylab = "Freqs", sub = paste("N = ", length(textsFreqs[,2])))

#boxplot(textsFreqs[,2])
#summary(textsFreqs[,2])
# geometric mean
exp(mean(log(textsFreqs[,2])))
## [1] 2.715444
# frequencies for each numeric value
numFreqs = table(textsFreqs[,2])
numFreqs = as.data.frame(numFreqs)
numFreqs = sapply(numFreqs, as.numeric)
plot(numFreqs[,1], numFreqs[,2], log = "xy", main = "Distr. of wits per troubadour text - logarithmic scale", xlab = "number of witnesses", ylab = "freqs")

Petit florilège:

head(textsFreqs[order(textsFreqs[,2], decreasing = TRUE), ])
##              Var1 Freq
## 1747 BEdT 364,039   28
## 660  BEdT 155,001   26
## 675  BEdT 155,016   26
## 678  BEdT 155,021   25
## 673  BEdT 155,014   24
## 763  BEdT 167,059   24
# save
troubtextsFreqs = textsFreqs

Chronological distributions

  • Texts according to date of authors vs. witness according to date of ms.;
  • nr. of texts per author according to date;
  • nr. of witness per text according to date of author?

Authors per period

gensClean = sub("^(\\d).*$", "\\1", gens)
gensClean[grep(pattern = "^1", gens)] = "-1150"
gensClean[grep(pattern = "^2", gens)] = "1150-1175"
gensClean[grep(pattern = "^3", gens)] = "1170-1210"
gensClean[grep(pattern = "^4", gens)] = "1190-1235"
gensClean[grep(pattern = "^5", gens)] = "1230-1265"
gensClean[grep(pattern = "^6", gens)] = "1260-"
gensClean[grep(pattern = "^(9|0|a)", gens)] = "?"
plot(as.factor(gensClean), main="Nombre d'auteurs par génération", sub = "source: BeDT")

Texts per period

# add gen to autText
#autText = autText
autTextGen = cbind(autText, as.character(BeDT_auts[autText[, "aut"], ][, "gen"]), as.character(BeDT_auts[autText[, "aut"], ][, "gen"]))
colnames(autTextGen)[3:4] = c("gen", "indic")
autTextGen[, "gen"][grep("^1", autTextGen[, "gen"])] = "-1150"
autTextGen[, "gen"][grep("^2", autTextGen[, "gen"])] = "1150-1175"
autTextGen[, "gen"][grep("^3", autTextGen[, "gen"])] = "1170-1210"
autTextGen[, "gen"][grep("^4", autTextGen[, "gen"])] = "1190-1235"
autTextGen[, "gen"][grep("^5", autTextGen[, "gen"])] = "1230-1265"
autTextGen[, "gen"][grep("^6", autTextGen[, "gen"])] = "1260-"
autTextGen[, "gen"][grep("^(0|9|a)", autTextGen[, "gen"])] = "?"

plot(as.factor(autTextGen[, "gen"]), main="Nombre de textes par génération\nsource: BeDT", las = 2)

Witnesses per period

# Get sigla
sigla = as.factor(gsub("p\\_\\°?\\^?([A-Za-z0-9]+)\\_.*$", "\\1", BeDT[,"SIGLA"]))
levels(sigla)[grep("omega", levels(sigla))] = "omega"
levels(sigla)[grep("psi", levels(sigla))] = "psi"
levels(sigla)[grep("eta", levels(sigla))] = "eta"
levels(sigla)[grep("Bamb", levels(sigla))] = "bamberg136"
levels(sigla)[grep("BAV, PL", levels(sigla))] = "PalLat753"
levels(sigla)[grep("BAV, BL", levels(sigla))] = "BarbLat3953"
levels(sigla)[grep("Barc - 239", levels(sigla))] = "Barc239"
levels(sigla)[grep("Barc - 850", levels(sigla))] = "Barc850"
levels(sigla)[grep("Str.App.8", levels(sigla))] = "StrApp8"
levels(sigla)[grep("Nü - II.77", levels(sigla))] = "NurnbergII77"
levels(sigla)[grep("Mü", levels(sigla))] = "MunchenLat759"
levels(sigla)[grep("Harl - 4041", levels(sigla))] = "Harley3041"
levels(sigla)[grep("MI - D.55_0001", levels(sigla))] = "MilanoD55sup"
#write.csv("sigla.csv", x = levels(sigla))


BeDT = cbind(BeDT, sigla)

Evolving diversity per period

Authors as sites, texts as species, witnesses as individuals

# Vegan calso can estimate series of R ́enyi and Tsal-lis  diversities.   R ́enyi  diversity  of  orderais  (Hill,1973) -> TODO: voir cet indice
library("vegan")
## Le chargement a nécessité le package : permute
## Le chargement a nécessité le package : lattice
## This is vegan 2.6-8
# Global, texts as species, witnesses as individuals
vegan::diversity(textsFreqs[,2], index = "shannon")
## [1] 7.336752
# Evolution through time
# First, building data
# (and sheating a bit since text 001 is actually not the same for author A and B, but here we do not really care)
authors = sort(unique(autText[,1]))
texts = sort(unique(autText[,2]))
TextsByAuts = matrix(ncol = length(texts), nrow = length(authors), data = 0, dimnames = list(authors, texts) )

for(i in 1:length(authors)){
  for(j in 1:length(texts)){
    TextsByAuts[authors[i], texts[j]] = nrow(autText[autText[, 1] == authors[i] & autText[, 2] == texts[j], , drop = FALSE])
  }
}
# And now, global diversity
TextsByAuts_diversities = vegan::diversity(TextsByAuts, index = "shannon")
TextsByAuts_diversities
##  BEdT 001  BEdT 002  BEdT 003  BEdT 005 BEdT 006a  BEdT 007  BEdT 008  BEdT 009 
## 0.0000000 0.6931472 1.4516196 0.9556999 0.0000000 0.0000000 0.0000000 2.6045291 
##  BEdT 010  BEdT 011  BEdT 012 BEdT 012b  BEdT 015  BEdT 016 BEdT 016a BEdT 016b 
## 3.7129676 0.7757145 0.0000000 0.0000000 0.0000000 2.6664401 0.0000000 0.0000000 
##  BEdT 017  BEdT 019  BEdT 020  BEdT 021  BEdT 022  BEdT 023  BEdT 025  BEdT 026 
## 0.6365142 0.0000000 0.0000000 0.6931472 0.0000000 0.0000000 0.0000000 0.6365142 
##  BEdT 027  BEdT 028  BEdT 029  BEdT 030  BEdT 031  BEdT 032  BEdT 034  BEdT 040 
## 1.5746794 0.0000000 2.7363028 2.9178294 0.0000000 0.0000000 1.0397208 0.0000000 
##  BEdT 041  BEdT 043  BEdT 044  BEdT 046  BEdT 047  BEdT 048  BEdT 049  BEdT 050 
## 0.0000000 0.0000000 0.0000000 1.3204191 2.4042683 0.0000000 0.0000000 0.6931472 
##  BEdT 052  BEdT 053  BEdT 054  BEdT 055  BEdT 056  BEdT 057  BEdT 058  BEdT 059 
## 0.9002561 0.0000000 0.0000000 0.0000000 0.0000000 1.3862944 0.6730117 0.0000000 
##  BEdT 062  BEdT 063  BEdT 065  BEdT 066  BEdT 067  BEdT 068  BEdT 069  BEdT 070 
## 0.0000000 2.0981474 1.0549202 1.3296613 0.0000000 0.0000000 1.0986123 3.5915838 
##  BEdT 071  BEdT 074  BEdT 075  BEdT 076  BEdT 077  BEdT 079  BEdT 080  BEdT 081 
## 1.2770343 2.8371552 1.0397208 2.6477935 1.0986123 0.0000000 3.6141703 0.2868360 
##  BEdT 082  BEdT 083  BEdT 084  BEdT 085  BEdT 087  BEdT 088  BEdT 092  BEdT 094 
## 4.4184690 0.0000000 0.4505612 0.0000000 0.6829081 0.6869616 0.0000000 0.0000000 
##  BEdT 095  BEdT 096  BEdT 097  BEdT 098  BEdT 101  BEdT 102  BEdT 103  BEdT 104 
## 1.0397208 2.0870405 1.5931996 0.6730117 2.9104806 1.0397208 1.0986123 0.6931472 
##  BEdT 106  BEdT 107  BEdT 109  BEdT 111  BEdT 112  BEdT 114  BEdT 115  BEdT 119 
## 2.9271868 0.0000000 1.0986123 0.0000000 1.8181418 0.0000000 0.0000000 1.8466136 
##  BEdT 120  BEdT 121  BEdT 123  BEdT 124  BEdT 126  BEdT 129  BEdT 132  BEdT 133 
## 0.0000000 0.6931472 0.0000000 2.7687690 0.6931472 0.6931472 2.4610366 2.4978673 
##  BEdT 134  BEdT 136  BEdT 137  BEdT 138  BEdT 139  BEdT 140  BEdT 142  BEdT 143 
## 0.6365142 1.0549202 0.0000000 0.0000000 0.0000000 0.6931472 0.9075353 0.0000000 
##  BEdT 144  BEdT 145  BEdT 149 BEdT 150a  BEdT 151  BEdT 154  BEdT 155  BEdT 156 
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 2.1383331 2.9963445 2.2318462 
##  BEdT 157  BEdT 158  BEdT 159  BEdT 160  BEdT 162  BEdT 163  BEdT 166  BEdT 167 
## 0.0000000 0.0000000 0.0000000 0.0000000 0.9556999 0.0000000 0.0000000 3.7859138 
## BEdT 167a  BEdT 168  BEdT 171  BEdT 172  BEdT 173  BEdT 174  BEdT 177  BEdT 178 
## 0.0000000 0.2868360 0.0000000 0.0000000 2.4410353 2.3025851 0.0000000 0.0000000 
##  BEdT 180  BEdT 181  BEdT 183  BEdT 184  BEdT 185  BEdT 186  BEdT 187  BEdT 189 
## 0.0000000 0.0000000 2.2141814 0.3046361 0.5004024 0.0000000 0.0000000 1.4377014 
##  BEdT 190  BEdT 192  BEdT 193  BEdT 194  BEdT 197  BEdT 198  BEdT 201  BEdT 202 
## 0.0000000 1.5691529 0.0000000 2.5966682 1.4270610 0.0000000 0.9502705 2.1713001 
##  BEdT 203  BEdT 204  BEdT 205  BEdT 206  BEdT 208  BEdT 209  BEdT 210  BEdT 211 
## 0.0000000 1.3862944 1.9833652 1.1537419 0.0000000 0.9556999 2.9989108 0.0000000 
##  BEdT 213  BEdT 214  BEdT 215  BEdT 216  BEdT 217  BEdT 218  BEdT 219  BEdT 220 
## 1.9469845 0.6365142 0.0000000 0.6931472 2.1570016 0.0000000 0.0000000 0.0000000 
##  BEdT 223  BEdT 225  BEdT 226  BEdT 227  BEdT 229  BEdT 230  BEdT 231  BEdT 233 
## 1.7530369 2.2392829 1.3862944 2.1192809 1.3321790 1.3862944 1.4177980 1.2711815 
##  BEdT 234  BEdT 235  BEdT 236  BEdT 237  BEdT 238  BEdT 240  BEdT 241  BEdT 242 
## 2.4576439 0.6931472 2.3665290 0.0000000 1.0556334 1.7161228 0.0000000 4.1456408 
## BEdT 242a  BEdT 243  BEdT 244  BEdT 245  BEdT 246  BEdT 248  BEdT 249 BEdT 249a 
## 0.0000000 2.2588387 2.6701199 0.6869616 4.3195303 4.3724078 1.3451529 0.0000000 
##  BEdT 252  BEdT 253  BEdT 254  BEdT 256  BEdT 257  BEdT 258  BEdT 259  BEdT 261 
## 0.0000000 0.0000000 0.6931472 0.0000000 0.0000000 0.0000000 0.6931472 0.0000000 
##  BEdT 262  BEdT 265  BEdT 266  BEdT 267  BEdT 269  BEdT 270  BEdT 273  BEdT 276 
## 1.6165953 1.0986123 2.3978953 0.0000000 0.0000000 0.0000000 0.6277053 0.0000000 
##  BEdT 280  BEdT 281  BEdT 282  BEdT 283  BEdT 284  BEdT 285  BEdT 286  BEdT 288 
## 0.0000000 2.0024218 3.2286988 0.6365142 0.0000000 0.0000000 0.0000000 0.0000000 
##  BEdT 289  BEdT 290  BEdT 292  BEdT 293 BEdT 293a  BEdT 294  BEdT 295  BEdT 296 
## 1.3862944 0.6931472 0.0000000 3.5309692 0.0000000 0.6931472 0.0000000 0.6931472 
##  BEdT 297  BEdT 298  BEdT 299  BEdT 302  BEdT 304  BEdT 305  BEdT 306  BEdT 307 
## 0.0000000 0.0000000 0.0000000 0.0000000 1.0986123 2.5515221 1.0397208 0.0000000 
##  BEdT 308  BEdT 309  BEdT 310  BEdT 311  BEdT 312  BEdT 313  BEdT 314  BEdT 315 
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.5595812 
##  BEdT 317  BEdT 319  BEdT 320 BEdT 322a BEdT 322b  BEdT 323  BEdT 325  BEdT 326 
## 0.0000000 2.0431919 0.0000000 0.0000000 0.0000000 3.0095082 0.0000000 0.0000000 
##  BEdT 327  BEdT 328  BEdT 329  BEdT 330  BEdT 331  BEdT 332  BEdT 334  BEdT 335 
## 0.0000000 0.0000000 0.0000000 2.7384872 0.6210864 0.6765260 0.0000000 4.1203130 
##  BEdT 336 BEdT 336a  BEdT 337  BEdT 338  BEdT 339  BEdT 340  BEdT 342  BEdT 343 
## 0.0000000 0.6931472 0.0000000 0.0000000 1.0986123 0.0000000 1.0986123 0.0000000 
##  BEdT 344  BEdT 345  BEdT 346  BEdT 349  BEdT 350  BEdT 352  BEdT 353  BEdT 355 
## 1.4921078 0.0000000 0.0000000 2.0242775 0.0000000 1.0789922 0.0000000 2.6019490 
##  BEdT 356  BEdT 357  BEdT 358  BEdT 359  BEdT 361  BEdT 362  BEdT 363  BEdT 364 
## 2.1128460 0.0000000 0.0000000 0.0000000 0.0000000 0.6365142 0.0000000 3.6396266 
##  BEdT 365  BEdT 366  BEdT 367  BEdT 369  BEdT 370  BEdT 371  BEdT 372  BEdT 374 
## 0.0000000 3.3120449 0.0000000 0.0000000 1.9319363 0.0000000 2.0702839 0.6730117 
##  BEdT 375  BEdT 376  BEdT 377  BEdT 379  BEdT 380  BEdT 381  BEdT 384  BEdT 386 
## 3.0116480 0.2573186 1.8762736 0.3488321 0.0000000 0.6931472 0.0000000 1.6094379 
##  BEdT 388  BEdT 389  BEdT 390  BEdT 391  BEdT 392  BEdT 393  BEdT 394  BEdT 395 
## 0.6931472 3.3757948 0.0000000 0.0000000 3.2438491 0.0000000 0.0000000 0.0000000 
##  BEdT 396  BEdT 397  BEdT 398  BEdT 401  BEdT 404  BEdT 405  BEdT 406  BEdT 407 
## 1.3497924 0.6901857 0.0000000 2.1639557 2.1706615 0.0000000 3.5430548 0.0000000 
##  BEdT 409  BEdT 410  BEdT 411 BEdT 413a  BEdT 414  BEdT 415  BEdT 416  BEdT 418 
## 1.5247074 1.7917595 0.6931472 0.0000000 0.0000000 0.0000000 1.4648164 0.5623351 
##  BEdT 419  BEdT 420  BEdT 421  BEdT 422  BEdT 424  BEdT 425  BEdT 427  BEdT 428 
## 0.0000000 0.6365142 2.1648313 0.6791933 0.0000000 0.0000000 2.0794415 0.0000000 
##  BEdT 430  BEdT 432  BEdT 434 BEdT 434a  BEdT 435  BEdT 436  BEdT 437  BEdT 438 
## 0.0000000 0.2237181 3.2571438 4.4263419 0.0000000 1.5498260 3.3549985 0.0000000 
##  BEdT 439  BEdT 440  BEdT 441  BEdT 442  BEdT 443  BEdT 446  BEdT 447  BEdT 448 
## 0.0000000 0.0000000 0.0000000 0.5623351 1.8121428 0.6931472 0.0000000 0.6108643 
##  BEdT 449  BEdT 450  BEdT 451  BEdT 452  BEdT 453  BEdT 454  BEdT 455  BEdT 456 
## 1.2364771 1.7736351 0.6365142 0.0000000 0.0000000 0.6365142 0.0000000 0.8599673 
##  BEdT 457  BEdT 458  BEdT 459  BEdT 460 
## 3.1981854 0.0000000 0.0000000 0.0000000
# And distributions and means per period
uniqueGens = sort(unique(autTextGen[,"gen"]))
uniqueGens = uniqueGens[!uniqueGens %in% "?"]
autGens = unique(autTextGen[, c(1,3)])
summaries = list()
layout(matrix(c(1,2,3,4,5,6), 2, 3, byrow = TRUE))
for(i in 1:length(uniqueGens)){
  auts = autGens[autGens[, 2] == uniqueGens[i],][, 1]
  myDiv = vegan::diversity(TextsByAuts[auts,])
  summaries[[uniqueGens[i]]] = summary(myDiv)
  hist(myDiv, sub=uniqueGens[i])
}

#Shannon diversity vs. sample size (n wits)
AutsDivsAndSample = merge(as.matrix(TextsByAuts_diversities),as.matrix(witPerAut),by="row.names",all.x=TRUE)
row.names(AutsDivsAndSample) = AutsDivsAndSample[, 1]
AutsDivsAndSample = AutsDivsAndSample[, 2:3]
AutsDivsAndSample = AutsDivsAndSample[, c(2, 1)]
colnames(AutsDivsAndSample) = c("N. wits", "Shannon Diversity")
plot(AutsDivsAndSample)

Generations as sites, texts as species, witnesses as individuals

# Vegan calso can estimate series of R ́enyi and Tsal-lis  diversities.   R ́enyi  diversity  of  orderais  (Hill,1973) -> TODO: voir cet indice
library("vegan")
# Evolution through time
# First, building data
uniqueTexts = sort(unique(paste(autTextGen[,1], ',', autTextGen[,2], sep = '')))
TextsByGen = matrix(nrow = length(uniqueGens), ncol = length(uniqueTexts), dimnames = list(uniqueGens, uniqueTexts), data = 0)
for(i in 1:length(uniqueGens)){
  thisGenTexts = table(paste(autTextGen[autTextGen[, "gen"] == uniqueGens[i], ][,1], ',', autTextGen[autTextGen[, "gen"] == uniqueGens[i], ][,2] , sep=""))
  for(j in 1:length(thisGenTexts)){
    TextsByGen[uniqueGens[i], names(thisGenTexts[j])] = thisGenTexts[j]
  }
}
# And now, global diversity
TextGens_diversities = vegan::diversity(TextsByGen, index = "shannon")
TextGens_diversities
##     -1150 1150-1175 1170-1210 1190-1235 1230-1265     1260- 
##  4.041343  5.135258  6.019933  6.052840  5.538136  5.984030
# And distributions and means per period
plot(TextGens_diversities, type="b", sub = paste("Source = BedT -- N. wits = ", sum(TextsByGen)), xlab = "Generations", xaxt="n", ylab="Shannon div.", main = "Generations as sites, texts as species, witnesses as individuals")
axis(1, at=1:6, labels=uniqueGens)

Generations as sites, authors as species, texts as individuals

# Global, authors as species, texts as individuals (generations as sites)
vegan::diversity(table(autTextGen[,1]), index = "shannon")
## [1] 4.333927
# By generation now
# First, create the necessary clean table
uniqueAuts = unique(autTextGen[,1])
AuthorsByGen = matrix(nrow = length(uniqueGens), ncol = length(uniqueAuts), dimnames = list(uniqueGens, uniqueAuts), data = 0)
for(i in 1:length(uniqueGens)){
  thisGenAuts = table(autTextGen[autTextGen[, "gen"] == uniqueGens[i], ][,1])
  for(j in 1:length(thisGenAuts)){
    AuthorsByGen[uniqueGens[i], names(thisGenAuts[j])] = thisGenAuts[j]
  }
}
AutGens_diversities = vegan::diversity(AuthorsByGen, index = "shannon")
plot(AutGens_diversities, type="b", sub = paste("Source = BedT -- N. wits = ", sum(AuthorsByGen)), xlab = "Generations", xaxt="n", ylab="Shannon div.")
axis(1, at=1:6, labels=uniqueGens)

But Shannon measure is sensitive to sample size. As an alternative, we can use rarefaction (cf. vegan doc., http://127.0.0.1:15781/library/vegan/doc/diversity-vegan.pdf)

vegan::rarefy(AuthorsByGen, min(rowSums(AuthorsByGen)))
##     -1150 1150-1175 1170-1210 1190-1235 1230-1265     1260- 
##   8.00000  10.55774  32.95985  56.26795  38.46886  42.72285 
## attr(,"Subsample")
## [1] 317
plot(vegan::rarefy(AuthorsByGen, min(rowSums(AuthorsByGen))), type="b")

Lost works

Some stats on the lost works.

Chansonniers en France au XVI-XVIII

#cons et att.: q, S, Y, W, I, C, gamma, B, Z, E, f, delta, T, X, R, n, p
#cons., non att.: Ch, G?,  psi, 
# perdus: 4

data = matrix(c(
  17,
  3,
  4,
  0
  ),
  ncol = 1,
  dimnames = list(c("cons. et att.", "cons., non att.?", "perdus et att.", "perdus non att. ?"), "freq")
)
barplot(data, beside = TRUE, names.arg = rownames(data), las = 2)

tauxPerte = round(data[3,] / sum(data) * 100)

library(ggplot2)
data = as.data.frame(data)
ggplot(data=data, aes(x=rownames(data), y=data[,1])) +
  geom_bar(stat="identity") + xlab(paste("pertes > ", tauxPerte, "%")) + ylab( "Fréq.") + ggtitle("Chans. en Fr. XVI-XVIII")

Power-law probability distribution (Pareto type)

Use mass instead of density, because discrete (or account for the partiality of witnesses, e.g., a witness with a half of the text, or 0.65 of the text, etc. For now, a fragment of 2 verses is a witness in the same way as a complete manuscript of 2000 verses. But there is no extant data for this. ).

Selon wikipedia en, la formule de densité est \(P(X > x) \sim L(x)x^{-(\alpha+1)}\), où \(\alpha > 0\) et \(L(x)\) is a slowly varying function, qui contrôle la forme finie de la queue. Si \(L(x)\) est une constante, alors on a vraiment une loi de puissance.

La plupart du temps, on doit fixer une valeur minimale \(x_{min}\) à partir de laquelle la loi vaut (mais pas avant).

# and now density
#plot(density(textsFreqs[,2]), xlim = c(1,30), xaxs = "i")
# or rather, mass
plot(prop.table(table(textsFreqs[,2])), xlab = "n. witn.", ylab = "mass", main = "Troubadour Poems")
#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.3, add = TRUE, col = "red")

# la même chose en log
plot(prop.table(table(textsFreqs[,2])), xlab = "n. witn.", ylab = "mass", xlim = c(1,30), ylim = c(0.01, 0.4), log = "xy", main = "Troubadour Poems", sub = "log/log plot")
#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.3, add = TRUE, col = "red")

Estimation with regression

data = cbind(as.numeric(labels(table(textsFreqs[,2]))[[1]]), table(textsFreqs[,2]))

Cela semble s’appliquer aussi aux témoins de chansons de geste,

plot(prop.table(RepTrad[,1]), xlab = "n. witn.", ylab = "mass", main = "Chansons de geste")
#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.3, add = TRUE, col = "red")

# la même chose en log
plot(prop.table(RepTrad[,1]), xlab = "n. witn.", ylab = "mass", xlim = c(1,30), ylim = c(0.01, 0.4), log = "xy", main = "Chansons de geste", sub = "log/log plot")
## Warning in xy.coords(x, y, xlabel, ylabel, log): 12 y values <= 0 omitted from
## logarithmic plot
#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.3, add = TRUE, col = "red")

NB: Sauf qu’en fait, la constante au pif que j’ai utilisée devrait plutôt être une constante normalisatrice, telle que $p(x) = ( )^{-} $. En fait, je retombais vaguement sur cela empiriquement, vu que je supposais que \(x_{min}\) était 1 et que je donnais des valeurs voisines à la constante et \(\alpha - 1\). Donc passer de 1.3 à 1.4.

avec poweRlaw

Maintenant, on peut aussi essayer de trouver un fit via un algorithme dédié, par exemple via le module poweRlaw.

À voir: que fait le plot de poweRlaw exactement ? Il trace des CDF (cumulative degressive frequencies ?). Renvoient à l’équation 3.9 dans Clauset et al. (2009)

troub.pl = poweRlaw::displ$new(textsFreqs[,2])
troub.pl$setXmin(1)
(est = poweRlaw::estimate_pars(troub.pl))
## $pars
## [1] 1.680934
## 
## $value
## [1] 6097.674
## 
## $counts
## function gradient 
##        7        7 
## 
## $convergence
## [1] 0
## 
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
## 
## attr(,"class")
## [1] "estimate_pars"
geste.pl = poweRlaw::displ$new(RepTradData[,1])
geste.pl$setXmin(1)
(est = poweRlaw::estimate_pars(geste.pl))
## $pars
## [1] 1.765307
## 
## $value
## [1] 442.4034
## 
## $counts
## function gradient 
##        7        7 
## 
## $convergence
## [1] 0
## 
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
## 
## attr(,"class")
## [1] "estimate_pars"

Ce qui voudrait dire \(\alpha\) à 1,69 pour les troubadours et 1,76 pour les chansons de geste, avec \(x_{min} = 1\).

Mais on peut aussi chercher à optimiser \(x_{min}\):

(est1 = poweRlaw::estimate_xmin(troub.pl))
## $gof
## [1] 0.06748236
## 
## $xmin
## [1] 23
## 
## $pars
## [1] 11.04859
## 
## $ntail
## [1] 11
## 
## $distance
## [1] "ks"
## 
## attr(,"class")
## [1] "estimate_xmin"
(est2 = poweRlaw::estimate_xmin(geste.pl))
## $gof
## [1] 0.07572512
## 
## $xmin
## [1] 6
## 
## $pars
## [1] 2.954922
## 
## $ntail
## [1] 38
## 
## $distance
## [1] "ks"
## 
## attr(,"class")
## [1] "estimate_xmin"

Cela serait 23 pour les troubadours et 6 pour les chansons de geste. À partir de là, les valeurs d’\(\alpha\) seraient 11.0485886 et 2.9549223.

troub.pl$setXmin(est1$xmin)
troub.pl$setPars(est1$pars)
poweRlaw::plot(troub.pl)
poweRlaw::lines(troub.pl, col = 2)

#pas top, et ainsi
troub.pl$setXmin(1)
troub.pl$setPars(1.68)
poweRlaw::plot(troub.pl)
poweRlaw::lines(troub.pl, col = 2)

Pour être sûr qu’on soit bien en présence d’une loi de puissance, on peut tester la procédure fondée sur Clauset et al.

library("poweRlaw") # Nécessaire pour éviter un bug. Espace de nom mal déclaré dans le package ?
bs_p = poweRlaw::bootstrap_p(troub.pl, no_of_sims=100, threads=2)# sims passés de 1000 à 100 pour limiter temps d'exécution dans cette feuille.
## Expected total run time for 100 sims, using 2 threads is 7.44 seconds.
# ici, p serait égal à 0 ce qui signifierait qu'on n'a pas une loi de puissance du tout
troub.pl$setXmin(est1$xmin)
troub.pl$setPars(est1$pars)
bs_p2 = poweRlaw::bootstrap_p(troub.pl, no_of_sims=100, threads=2)
## Expected total run time for 100 sims, using 2 threads is 6.18 seconds.
# toujours 0, même en changeant les pars

Power-law frequency (n. witn.) / rank (Zipf type)

# Fait-maison
plot(x = rank(-textsFreqs[,2], ties.method = "random"), y = textsFreqs[,2])

plot(x = rank(-textsFreqs[,2], ties.method = "random"), y = textsFreqs[,2], log="xy")

# zipfR

Voir zipfR.

Fitting a distribution.

Pour une liste des noms de distributions standards dans R, cf. la doc Distributions {stats}

  • fit a (discrete) power law
fit_pl = igraph::fit_power_law(textsFreqs[,2], implementation = "R.mle")
fit_pl
## 
## Call:
## stats4::mle(minuslogl = mlogl, start = list(alpha = start))
## 
## Coefficients:
##    alpha 
## 1.674228
stats4::logLik(fit_pl)
## 'log Lik.' -6094.22 (df=1)
fit_pl = igraph::fit_power_law(textsFreqs[,2], implementation = "plfit")
fit_pl
## $continuous
## [1] FALSE
## 
## $alpha
## [1] 15.73528
## 
## $xmin
## [1] 22
## 
## $logLik
## [1] -35.22062
## 
## $KS.stat
## [1] 0.05434208
  • as a discrete variable
## Le chargement a nécessité le package : MASS
## Le chargement a nécessité le package : survival
plotdist(textsFreqs[,2], histo = TRUE, demp = TRUE, discrete = TRUE)

descdist(textsFreqs[,2], discrete=TRUE, boot = 500)

## summary statistics
## ------
## min:  1   max:  28 
## median:  2 
## mean:  4.614592 
## estimated sd:  5.118794 
## estimated skewness:  1.640177 
## estimated kurtosis:  4.977446
#fit_exp = fitdist(textsFreqs[,2], "exp", discrete = TRUE)
#plot(fit_exp)

#dmyPlaw = function(a,x,k) a * x^-k 
#fit_pl = fitdist(textsFreqs[,2], "myPlaw", start = list(a = 1, k = 1))

# Partons sur poisson, binomial, negative binomial, geometrique, hypergeometrique
fit_p = fitdist(textsFreqs[,2], "pois") # -> pas ça.
fit_p
## Fitting of the distribution ' pois ' by maximum likelihood 
## Parameters:
##        estimate Std. Error
## lambda 4.614592  0.0427754
# Donc, lambda serait 4.44 et son écart-type sqrt(3.67)
plot(fit_p)

#fit_b = fitdist(wl, "binom", lower = c(0, 0))
fit_nb = fitdist(textsFreqs[,2], "nbinom") # -> toujours pas.
plot(fit_nb)

fit_g = fitdist(textsFreqs[,2], "geom")# -> niet
plot(fit_g)

#Et les distribs avec paramètres
#prefit(textsFreqs[,2], "hyper")
#fit_hg = fitdist(textsFreqs[,2], "hyper")

denscomp(list(fit_p, fit_nb, fit_g))

#cdfcomp (list(fit_w, fit_g, fit_ln), legendtext = plot.legend)
  • as a continuous variable:
# En le traitant comme variable continue
plotdist(textsFreqs[,2], histo = TRUE, demp = TRUE, discrete = FALSE)

descdist(textsFreqs[,2], discrete=FALSE, boot = 500)

## summary statistics
## ------
## min:  1   max:  28 
## median:  2 
## mean:  4.614592 
## estimated sd:  5.118794 
## estimated skewness:  1.640177 
## estimated kurtosis:  4.977446

Regression

plot(log(numFreqs[,1]), log(numFreqs[,2]), main = "Distr. of wits per troubadour text - log / log plot", xlab = "log(number of witnesses)", ylab = "log(freqs)")
colnames(numFreqs)[1] = "NbWits"
reg = lm(log(Freq) ~ log(NbWits), data = as.data.frame(numFreqs))
abline(reg, col="red")
summary(reg)
## 
## Call:
## lm(formula = log(Freq) ~ log(NbWits), data = as.data.frame(numFreqs))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8901 -0.3787  0.1810  0.5814  0.9218 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.6040     0.4662  16.309 7.81e-15 ***
## log(NbWits)  -1.7751     0.1843  -9.631 6.82e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7907 on 25 degrees of freedom
## Multiple R-squared:  0.7877, Adjusted R-squared:  0.7792 
## F-statistic: 92.75 on 1 and 25 DF,  p-value: 6.817e-10
mtext(text = paste("Adj. R² ", round(summary(reg)$adj.r.squared, digits = 3 )), line = 4, side= 1, cex=1)

Chansons de geste

##Répartition par siècle des manuscrits

barplot(RepSiecle[,1], names.arg = rownames(RepSiecle), main = "Epic mss per century", sub="From Duggan (1982), Careri (2006) and Careri et al. (2011)")

barplot(RepTranche[,3], names.arg = rownames(RepTranche), main = "Epic mss per half-century", sub="From Duggan (1982), Careri (2006) and Careri et al. (2011)")

Distr. of witnesses per chanson de geste

barplot(RepTrad[,1], names.arg = rownames(RepTrad), main = "Distr. of wits per epic text", sub = "from Vitale-Brovarone (2006)", ylab = "Freqs", xlab = "nb. witnesses")

plot(RepTrad[,1], type = "h", col = "red", lwd = 10, main = "Distr. of wits per epic text \n Data: Vitale-Brovarone (2006)", xlab = "number of witnesses", ylab = "Freqs", sub = paste("N = ", sum(RepTrad[,1])), xlim = c(0.1,30), ylim = c(1,80), xaxs = "i", yaxs = "i")

#mtext(text = "nb. de témoins", line = 2, side= 1, cex=1.2)
plot(density(RepTrad[,1]), main = "Kernel Density Estimation", sub = "Distribution of witnesses per epic text")

w = vector()
for(i in seq_len(length(RepTrad[,1]))){
  w = c(w, rep(i, RepTrad[i,1]))
}
summary(w)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   3.498   4.000  29.000

Power-law probability distribution (Pareto type)

plot(prop.table(RepTrad[,1]), xlab = "n. witn.", ylab = "mass", main = "Distr. of witness per epic text", type = "h")
#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.3, add = TRUE, col = "red")

# la même chose en log
plot(prop.table(RepTrad[,1]), xlab = "n. witn.", ylab = "mass", xlim = c(1,30), ylim = c(0.01, 0.4), log = "xy")
## Warning in xy.coords(x, y, xlabel, ylabel, log): 12 y values <= 0 omitted from
## logarithmic plot
#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.3, add = TRUE, col = "red")

###Description de la distribution

table(RepTradData[,1])
## 
##  1  2  3  4  5  6  7  8  9 10 11 13 14 15 17 22 29 
## 76 44 28 11  8  8 10  1  3  4  2  3  1  1  3  1  1
#plot(RepTradData[,1], ylab = "Nb. de témoins", main = "Distrib. des chansons selon leur nb. de témoins",  sub="d'après Vitale-Brovarone (2006)")

#Graphes log/log
#plot(log(RepTradData[,1]), ylab = "Nombre de mss", main = "Distrib. des chansons selon leur nb. de mss",  sub="d'après Vitale-Brovarone (2006)")
#Vaut-il mieux les faire en barplot?
RepTrad2 = RepTrad2[RepTrad2[,1] > 0 , ]

plot(RepTrad2[,2], RepTrad2[,1], main="Fréquence en chansons pour le nb. de témoins", ylab="Fréq. des chansons", xlab="Nb. de témoins")

plot(RepTrad2[,2], RepTrad2[,1], log="xy",  main = "Distr. of wits per epic text - logarithmic scale", xlab = "number of witnesses", ylab = "freqs")

#hist(RepTradData[,1], breaks = 1:30)
#hist(log(RepTradData[,1]))
#plot(RepTrad2, log="xy", type='h')
#boxplot(RepTradData)
summary(RepTradData)
##  Nb.de.témoins   
##  Min.   : 1.000  
##  1st Qu.: 1.000  
##  Median : 2.000  
##  Mean   : 3.498  
##  3rd Qu.: 4.000  
##  Max.   :29.000
#Moyenne géométrique
exp(mean(log(RepTradData[,1])))
## [1] 2.33514

Lost works and manuscripts

Tradition status

Status of the tradition for major versions,

epicWorks = read.csv("data/geste_works.csv")
# Quick reordering
epicWorks[, "Statut.trad."] = factor(epicWorks[, "Statut.trad."], levels = c("kept", "fragm", "lost"))
epicWorks[, "Trad.hypoth"] = factor(epicWorks[, "Trad.hypoth"], levels = c("hypoth", "attested"))

ggplot(data=epicWorks, aes(Statut.trad.)) + geom_bar(aes(fill=Trad.hypoth), color = "black") + ggtitle("Gestes: Major versions") +
      xlab("Status of the tradition") + theme(axis.text.x = element_text(size = rel(1.2), face = "bold")) +  scale_fill_manual(values=c("darkgray", "darkred"))

and for works,

meta = unique(epicWorks[, "Meta"])
worksStatus = matrix(nrow = length(meta), ncol = 3, dimnames = list(NULL, c("Meta", "StatusTrad", "Trad.hypoth")))
worksStatus[, "Trad.hypoth"] = "attested"
for (i in 1:length(meta)){
  worksStatus[i, "Meta"] = as.character(meta[i])
  # if there is at least one kept version
  if("kept" %in% epicWorks[epicWorks[, "Meta"] == meta[i],][, "Statut.trad."]){
    worksStatus[i,"StatusTrad"] = "kept"
  } else{
    if("fragm" %in% epicWorks[epicWorks[, "Meta"] == meta[i],][, "Statut.trad."]){
    worksStatus[i,"StatusTrad"] = "fragm"
    } else{
    if("lost" %in% epicWorks[epicWorks[, "Meta"] == meta[i],][, "Statut.trad."]){
    worksStatus[i,"StatusTrad"] = "lost"
    if(!"attested" %in% epicWorks[epicWorks[, "Meta"] == meta[i],][, "Trad.hypoth"]){
      worksStatus[i, "Trad.hypoth"] = "hypoth"
    }
  }
  }
  }
}
worksStatus = as.data.frame(worksStatus)

# Quick reordering
worksStatus[, "StatusTrad"] = factor(worksStatus[, "StatusTrad"], levels = c("kept", "fragm", "lost"))
worksStatus[, "Trad.hypoth"] = factor(worksStatus[, "Trad.hypoth"], levels = c("hypoth", "attested"))

ggplot(data=worksStatus, aes(StatusTrad)) + geom_bar(aes(fill=Trad.hypoth), color = "black") + ggtitle("Gestes: Works") +
      xlab("Status of the tradition") + theme(axis.text.x = element_text(size = rel(1.2), face = "bold")) +  scale_fill_manual(values=c("darkgray", "darkred"))

Distribution by century

ggplot(data=epicWorks, aes(StandDate)) + geom_bar(aes(fill=Statut.trad.), color = "black") + ggtitle("Gestes: Major versions by century") +
      xlab("Status of the tradition") + theme(axis.text.x = element_text(size = rel(1.2), face = "bold")) 

Catalogues: Corpus of British Medieval libraries catalogues

catalogues = matrix(data = c(8, 33, 4), ncol = 1, dimnames = list(c("Ms. identif.", "Ms. unidentif.", "Text (& ms.) unindentif."), "counts"))

barplot(t(catalogues), main = "Epic MSS in British Med. Catalogues", sub = "No identif. (lost?) for c. 75%")

# Alternative version with GGPlot, faking actual data

catalogues = matrix(nrow = (8+33+4), ncol = 2, 
       data = c(rep("identified", 8), rep("unidentif.", 37), 
                rep("known", (8+33)), rep("unknown", 4)), 
       dimnames = list(NULL, c("MS.", "Text"))
       )
catalogues = as.data.frame(catalogues)

ggplot(data=catalogues, aes(MS.)) + geom_bar(aes(fill=Text), color = "black") + ggtitle("Epic MSS in British Med. Catalogues") + xlab("No identif. (lost?) for c. 75%") + theme(axis.text.x = element_text(size = rel(1.2), face = "bold"))

Hypothèses sur les mss perdus

Si l’on suppose un taux de décimation d’un manuscrit conservé sur 1000 copiés

Decimation=1000
RepTradSuppose = RepTrad2
for(i in 1:nrow(RepTradSuppose)){
    RepTradSuppose[i,2] = RepTradSuppose[i,2] * Decimation
}
plot(log(RepTradSuppose[,2]), log(RepTradSuppose[,1]), main=paste('Fréquence en chansons pour le nb. de témoins \n Décimation supposée:', Decimation-1, 'sur', Decimation), ylab="log(Fréq. des chansons)", xlab="log(Nb. de témoins)", xlim = c(0,10), ylim=c(0,20))
reg = lm(log(Fréquence..chansons.) ~ log(Nb..de.témoins), data=RepTradSuppose)
abline(reg, untf = TRUE, col="red")
mtext(paste("R² ajusté", round(summary(reg)$adj.r.squared, digits = 4)), side = 1, line=4)

#Régression
summary(reg)
## 
## Call:
## lm(formula = log(Fréquence..chansons.) ~ log(Nb..de.témoins), 
##     data = RepTradSuppose)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.45519 -0.18416  0.02561  0.35412  0.74680 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          14.6108     1.4251  10.252 3.60e-08 ***
## log(Nb..de.témoins)  -1.4638     0.1585  -9.234 1.41e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.557 on 15 degrees of freedom
## Multiple R-squared:  0.8504, Adjusted R-squared:  0.8404 
## F-statistic: 85.27 on 1 and 15 DF,  p-value: 1.411e-07

Œuvres médiévales latines à succès (FAMA)

Situation un peu différente ici, car la base n’enregistre que (une sélection d’) œuvres latines à succès.

# plot it
plot(textsFreqs[,2])

#barplot(textsFreqs[,2])
# hist(textsFreqs[,2], breaks = seq(min(textsFreqs[,2])-0.5, max(textsFreqs[,2])+0.5, by=1), main = "Distribution of witnesses per text", xlab = "number of witnesses", include.lowest = TRUE)
# or, better
plot(table(textsFreqs[,2]), type = "h", col = "red", lwd = 10, main = "Distr. of witnesses per mediolatin successfull text", xlab = "number of witnesses", ylab = "Freqs", sub = paste("N = ", length(textsFreqs[,2])))

#boxplot(textsFreqs[,2])
#summary(textsFreqs[,2])
# geometric mean
exp(mean(log(textsFreqs[,2])))
## [1] 62.48062
# frequencies for each numeric value
numFreqs = table(textsFreqs[,2])
numFreqs = as.data.frame(numFreqs)
numFreqs = sapply(numFreqs, as.numeric)
plot(numFreqs[,1], numFreqs[,2], log = "xy", main = "Distr. of wits per troubadour text - logarithmic scale", xlab = "number of witnesses", ylab = "freqs")

Power-law probability distribution (Pareto type)

# or rather, mass
plot(prop.table(table(textsFreqs[,2])), xlab = "n. witn.", ylab = "mass", main = "Fama")
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.4, add = TRUE, col = "red")

# la même chose en log
plot(prop.table(table(textsFreqs[,2])), xlab = "n. witn.", ylab = "mass", xlim = c(1,4000), ylim = c(0.01, 0.06), log = "xy", main = "Fama", sub = "log/log plot")
#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
curve(0.4 * x^-1.4, add = TRUE, col = "red")

Incunabula printed in Italy

Copies per work

Number of copies per work

edFreqs = table(ISTC_wits[,"text"])
booksPerWorks = as.data.frame(edFreqs)
#View(edFreqs[with(edFreqs, order(-edFreqs[,2])), ])
plot(table(booksPerWorks[,2]), type = "h", col = "red", lwd = 10, xlab = "n. copies", main = "Incunabula printed in Italy \n Extant copies per work", ylab = "Freqs", sub = paste("Source: ISTC  -- N = ", sum(booksPerWorks[,2])))

What are the texts with most books ?

#View(edFreqs)

Copies per edition

Exemplaries by edition

edFreqs = table(ISTC_books[,"X_id"])
edFreqs = as.data.frame(edFreqs)
#edFreqs[with(edFreqs, order(-edFreqs[,2])), ]

plot(table(edFreqs[,2]), type = "h", col = "red", lwd = 10, xlab = "n. copies", main = "Incunabula printed in Italy \n Extant copies per edition", ylab = "Freqs", sub = paste("Source: ISTC  -- N = ", length(edFreqs[,2])))

plot(prop.table(table(edFreqs[,2])), xlab = "n. copies", ylab = "mass", main = "Incunabula printed in Italy \n Extant copies per edition")

#hist(textsFreqs[,2], probability = TRUE)
# Try to fit a power-law # en bonne logique, barres plutôt que ligne
#curve(0.4 * x^-1.3, add = TRUE, col = "red")

Number of edition per work

EdWorks = ISTC_wits[, c("X_id", "text")]
EdWorks = unique(EdWorks)
EdWorks = as.data.frame(table(EdWorks[,2]))
#View(EdWorks[with(EdWorks, order(-EdWorks[,2])), ])

plot(table(EdWorks[,2]), type = "h", col = "red", lwd = 10, xlab = "n. editions", main = "Incunabula printed in Italy \n Editions (with extant copies) per work", ylab = "Freqs", sub = paste("Source: ISTC  -- N = ", length(EdWorks[,2])))

Text per author

# Get authors and texts
Incu_autsTexts = unique(ISTC_wits[, c("data.author", "text")])
# Remove anonyms
Incu_autsTexts = Incu_autsTexts[!Incu_autsTexts[,"data.author"] == "", ]
# Count
table_Incu_autsTexts = table(Incu_autsTexts[,1])
# Weird
table_Incu_autsTexts = table_Incu_autsTexts[!table_Incu_autsTexts == 0]
#
plot(table(table_Incu_autsTexts), type = "h", col = "red", lwd = 10, main = "Distr. of texts per author", xlab = "number of texts", ylab = "Freqs", sub = paste("N = ", nrow(Incu_autsTexts)))

Quels sont ceux qui ont écrit le plus de textes ?

head(sort(table_Incu_autsTexts, decreasing = TRUE))
## 
## Sixtus IV, Pont. Max. (formerly Franciscus, Cardinalis de Rovere) 
##                                                                77 
##             Innocentius VIII, Pont. Max. (Giovanni Battista Cibo) 
##                                                                75 
##                                            Savonarola, Hieronymus 
##                                                                69 
##                                                Ubaldis, Baldus de 
##                                                                53 
##                Alexander VI, Pont. Max. (formerly Rodrigo Borgia) 
##                                                                46 
##                                            Cicero, Marcus Tullius 
##                                                                45

Geste + romans

data = read.csv("data/Corpora_Merged_list_with_corrs_2_revu-Guidi.csv")
d = as.data.frame(table(data$TRI))

library(ggplot2)
ggplot(data=d, aes(x=Freq)) + geom_bar(stat = "count") + scale_y_continuous(trans='log10')

Minnesang

minne = read.csv(file = "data/minnesang/traditions_scraped.csv", header = TRUE, row.names=1,stringsAsFactors = FALSE)
#View(minne)
counts = minne[,4]

Distr. of witnesses per lied

barplot(table(counts),  main = "Distr. of wits per Minnesänger text", sub = "from Lyrik des Deutschen Mittelalters", ylab = "Freqs", xlab = "nb. witnesses")

plot(table(counts),, type = "h", col = "red", lwd = 10, main = "Distr. of wits per  per Minnesänger text \n Data: Lyrik des Deutschen Mittelalters", xlab = "number of witnesses", ylab = "Freqs", sub = paste("N = ", sum(counts)))

#mtext(text = "nb. de témoins", line = 2, side= 1, cex=1.2)
plot(table(counts), main = "Kernel Density Estimation", sub = "Distribution of witnesses per epic text")

FIGS FOR PAPER

Fig. 3